#!/usr/bin/env python
# -*- coding:utf-8 -*-

source_path = "../data/train.txt"
out_path = "../data/train_new.txt"

import re

start_token = 'B'
end_token = 'E'

def papre():
    outf = open(out_path, 'w')
    f = open(source_path)
    for _ , line in enumerate(f):
        line = line.strip("\n").strip()
        sents = line.split()
        for sent in sents:
            ss = sent.split("？")
            for s in ss:
                s = s.strip()
                if not s:
                    continue
                outf.write(" ".join(list(s))+"\n")
        outf.write("\n")


if __name__ == '__main__':
    papre()

